#### Fit the econometric-structural model to each municipality, with
#### rational price expectations.

### Prepare environment

setwd("~/research/coffee-dataverse/src")
do.origspec <- T

library(dplyr)
library(lfe)
library(splines)
library(MASS)
library(rstan)
rstan_options(auto_write = TRUE)
options(mc.cores = parallel::detectCores())

source("load.R", encoding="iso-8859-1")

output.suffix <- paste0("-eq24rational", ifelse(do.origspec, "-orig", "-minx"))

### Core Bayesian model

stan.model.one <- "
data {
  int<lower=0> N; // observations
  int<lower=0> T; // periods
  int<lower=0> M; // regions
  int<lower=0> K; // weather data values for yield
  int<lower=0> L; // weather data values for death

  int<lower=1, upper=T> timestep[N];
  int<lower=0, upper=N-1> back1index[N];
  int<lower=0, upper=N-2> back2index[N];
  int<lower=1, upper=M> region[N];
  int year2000[N];

  // predictors
  matrix[N, K] weather_yield;
  matrix[N, L] weather_death;
  vector[N] price_delayed; // delayed price

  // outcomes
  vector[N] production;
  vector[N] harvest;
}
transformed data {
  vector[N] logproduction = log(production);
  vector[N] logharvest = log(harvest);

  // XXX: DROP death_fe because colinear with planting_fe
  real death_fe[M];
  for (jj in 1:M)
    death_fe[jj] = 0;
}
parameters {
  // Hidden variable bearing
  real logbearing[N];
  real<lower=0> bearing_sigma[M];

  // Expected drivers of bearing
  real<lower=0, upper=1> autoreg;
  real planting_fe; // could be anything

  real<upper=3> yield_fe[M]; // best yield < 20
  real<lower=0> cost_to_planting;

  real<lower=0, upper=100> cost_harvest;

  real scale_trend;
  vector[K] yield_coeff;
  vector<upper=0>[L] death_coeff; // below -178, mean year produces 100% loss

  real<lower=0, upper=.95> yield_uncertainty; // at 1, could be 0 to double; < 1 so log > -inf
  real<lower=0> production_sigma[M];
  real<lower=0> harvest_sigma[M];
}
transformed parameters {
  vector[N] logyield_meanpart;
  vector[N] yield_mean;
  vector<lower=0>[N] yield_range;

  vector[N] harvest_part; // portion harvested due to prices
  vector[N] harvest_yield; // yield of harvested portion
  vector[N] soft_harvest_part; // smoothed harvested portion [0 - 1]
  vector[N] soft_harvest_yield; // smoothed harvested yield [0 - mean_yield]

  vector[N] logdeath_partpart;
  vector[N] death_part; // portion unharvested due to death

  vector[N] bearing_nerlove;
  vector[N] logbearing_mean;

  logyield_meanpart = weather_yield * yield_coeff;
  for (ii in 1:N) {
    yield_mean[ii] = exp(yield_fe[region[ii]] + logyield_meanpart[ii]);
  }
  yield_range = yield_mean * yield_uncertainty;

  harvest_part = (yield_mean + yield_range - cost_harvest ./ price_delayed) ./ (2 * yield_range);
  logdeath_partpart = weather_death * death_coeff;
  for (ii in 1:N) {
    harvest_yield[ii] = ((yield_mean[ii] + yield_range[ii])^2 - (cost_harvest ./ price_delayed[ii])^2) / (4 * yield_range[ii]);
    death_part[ii] = 1 - exp(death_fe[region[ii]] + logdeath_partpart[ii]);
  }

  // handles values outside 0-1
  soft_harvest_part = inv_logit(4 * harvest_part - 2); // may still be 1!
  soft_harvest_yield = inv_logit(4 * harvest_yield ./ yield_mean - 2) .* yield_mean; // max is yield_mean

  for (ii in 1:N) {
     if (timestep[ii] <= 2) {
      bearing_nerlove[ii] = harvest[ii];
    } else {
      bearing_nerlove[ii] = autoreg * bearing_nerlove[back2index[ii]] + planting_fe + cost_to_planting * price_delayed[back1index[ii]];
    }
  }

  for (ii in 1:N) {
    if (timestep[ii] <= 2)
      logbearing_mean[ii] = 0; // not used
    else
      logbearing_mean[ii] = log(bearing_nerlove[ii]) + log(1 - death_part[back1index[ii]]) + log(1 - death_part[back2index[ii]]);
  }
}
model {
  // Distributional models
  for (ii in 1:N) {
    logharvest[ii] ~ normal(logbearing[ii] + log(soft_harvest_part[ii]), harvest_sigma[region[ii]]);
    logproduction[ii] ~ normal(logbearing[ii] + log(soft_harvest_yield[ii]) + scale_trend * year2000[ii], production_sigma[region[ii]]);
    if (timestep[ii] > 2) {
      logbearing[ii] ~ normal(logbearing_mean[ii], bearing_sigma[region[ii]]);
    }
  }
}"

### Prepare data

## Calculate bearing areas
df$bearing.guess <- NA
for (region in unique(df$Munic�pio)) {
    rows <- which(df$Munic�pio == region)
    bearing <- 0
    for (year in min(df$year[rows]):max(df$year[rows])) {
        rr <- rows[which(df$year[rows] == year)]
        bearing <- max(bearing * .85, df$total.harvest[rr])
        df$bearing.guess[rr] <- bearing
    }
}

df$yield.guess <- df$total.produce / df$bearing.guess
df$logyield.guess <- log(df$yield.guess)
df$logyield.guess[!is.finite(df$logyield.guess)] <- NA

olsmod <- felm(as.formula(paste("logyield.guess ~", mainspec.rhs, "| 0 | Munic�pio + year")), data=df)

## Calculate weather-only residuals to get trend prior and FE
##   Determine fe and trend priors across all regions
df$ols.resid <- df$logyield.guess - as.matrix(df[, weatherpreds]) %*% matrix(olsmod$coefficients[1:length(weatherpreds)], length(weatherpreds), 1)
df$year2000 <- df$year - 2000
linmod <- felm(ols.resid ~ year2000 | Munic�pio | 0 | Munic�pio, df)
linfes <- getfe(linmod, se=T, cluster=T, robust=T)

### Calibrate on each region

if (file.exists(paste0("../data/sumstats", output.suffix, ".csv"))) {
    completed <- unique(read.csv(paste0("../data/sumstats", output.suffix, ".csv"))$region)
} else {
    completed <- c()
}
length(completed)

count <- 0
fit <- NA
## region <- sample(df$Munic�pio[df$total.produce > quantile(df$total.produce, .9)], 1)
for (region in unique(df$Munic�pio)) {
    count <- count + 1
    if (region %in% completed)
        next

    completed <- c(completed, region)

    subdf <- df[df$Munic�pio == region & !is.na(df$edd1000),]
    if (nrow(subdf) < 4)
        next

    ## Calculate delayed price
    portion.arabica <- get.portion.arabica.one(subdf)
    prices$year.p1 <- prices$year + 1
    subdf2 <- subdf %>% left_join(prices, by=c('year'='year.p1')) # 2000 gets 1999

    subdf2$price.delayed <- subdf2$price.arabica * portion.arabica + subdf2$price.robusta * (1 - portion.arabica)

    myfes <- sapply(levels(factor(subdf2$Munic�pio)), function(region) linfes$effect[linfes$idx == region])
    myfes.se <- sapply(levels(factor(subdf2$Munic�pio)), function(region) linfes$robustse[linfes$idx == region])

    if (length(myfes) == 1) {
        myfes <- array(myfes, 1)
        myfes.se <- array(myfes.se, 1)
    }

    stan.data <- list(N=nrow(subdf2), T = nrow(subdf2), K = length(weatherpreds), L = 1, M = 1,
                      timestep=1:nrow(subdf2), back1index=0:(nrow(subdf2)-1), back2index=c(0, 0:(nrow(subdf2)-2)),
                      region=rep(1, nrow(subdf2)), year2000=subdf2$year - 2000,
                      weather_yield = subdf2[, weatherpreds],
                      weather_death = as.matrix(subdf2[, c('edd1000')]),
                      price_delayed = subdf2$price.delayed,
                      bearing=subdf2$bearing.guess, # only for stan.model.nobear
                      production = subdf2$total.produce, harvest = subdf2$total.harvest,
                      trend_prior=linmod$coefficients[1], trend_prior_se=linmod$coefficients[1], #linmod$cse[1], - loosen trend prior
                      weather_prior=olsmod$coefficients[1:length(weatherpreds)], weather_prior_vcv=olsmod$vcv[1:length(weatherpreds), 1:length(weatherpreds)],
                      yield_fe_prior=myfes, yield_fe_prior_se=myfes.se)

    ## Estimate for pricememory = 0
    modinit <- lm(bearing.guess ~ lag(price.delayed, 1) + lag(bearing.guess, 2), data=subdf2)

    init <- function() {
        bearing_sigma <- rep(sd(sample(modinit$residuals, length(modinit$residuals), replace=T)), length(unique(subdf2$Munic�pio))) # TODO
        yield_fe <- rnorm(length(unique(subdf2$Munic�pio)), myfes, myfes.se)
        autoreg <- rnorm(1, modinit$coeff[3], sqrt(vcov(modinit)[3, 3]))
        autoreg <- ifelse(autoreg > 1, 1, ifelse(autoreg < 0, 0, autoreg))

        if (length(unique(subdf2$Munic�pio)) == 1) {
            bearing_sigma <- array(bearing_sigma, 1)
            yield_fe <- array(yield_fe, 1)
        }

        list(logbearing=log(subdf2$bearing.guess + rnorm(nrow(subdf2), 0, .15 * df$total.harvest)),
             planting_fe=rnorm(1, modinit$coeff[1], sqrt(vcov(modinit)[1, 1])),
             cost_to_planting=abs(rnorm(1, modinit$coeff[2], sqrt(vcov(modinit)[2, 2]))),
             autoreg=autoreg,
             cost_harvest=runif(1, 0, mean(subdf2$price.delayed)),
             yield_fe=yield_fe,
             scale_trend=rnorm(1, linmod$coefficients[1], linmod$cse[1]),
             yield_coeff=as.numeric(mvrnorm(1, olsmod$coefficients[1:stan.data$K], olsmod$vcv[1:stan.data$K, 1:stan.data$K])))
    }

    if (is.na(fit)) {
        fit <- stan(model_code = stan.model.one, data = stan.data,
                    init=init, iter = 1000, chains = 8, open_progress=F)
    } else {
        fit <- stan(fit=fit, data = stan.data,
                    init=init, iter = 1000, chains = 8, open_progress=F)
    }

    print(fit)

    la <- extract(fit, permute=T)
    if (is.null(la)) {
        if (file.exists(paste0("../data/sumstats", output.suffix, ".csv")))
            write.table(data.frame(param="lp__", region, mean=NA, se_mean=NA, sd=NA, `2.5%`=NA, `25%`=NA, `50%`=NA, `75%`=NA, `97.5%`=NA, neff=NA, Rhat=NA), paste0("../data/sumstats", output.suffix, ".csv"), sep=",", row.names=F, col.names=F, append=T)
        else
            write.table(data.frame(param="lp__", region, mean=NA, se_mean=NA, sd=NA, `2.5%`=NA, `25%`=NA, `50%`=NA, `75%`=NA, `97.5%`=NA, neff=NA, Rhat=NA), paste0("../data/sumstats", output.suffix, ".csv"), sep=",", row.names=F)
        next
    }

    if (length(weatherpreds) == 6) {
        df.vmc <- data.frame(region, bearing_sigma=la$bearing_sigma, yield_fe=la$yield_fe, planting_fe=la$planting_fe,
                             autoreg=la$autoreg,
                             cost_harvest=la$cost_harvest, cost_to_planting=la$cost_to_planting,
                             scale_trend=la$scale_trend,
                             yield_coeff1=la$yield_coeff[,1],
                             yield_coeff2=la$yield_coeff[,2], yield_coeff3=la$yield_coeff[,3],
                             yield_coeff4=la$yield_coeff[,4], yield_coeff5=la$yield_coeff[,5],
                             yield_coeff6=la$yield_coeff[,6], death_coeff1=la$death_coeff[,1],
                             yield_uncertainty=la$yield_uncertainty,
                             production_sigma=la$production_sigma, harvest_sigma=la$harvest_sigma)
    } else {
        df.vmc <- data.frame(region, bearing_sigma=la$bearing_sigma, yield_fe=la$yield_fe, planting_fe=la$planting_fe,
                             autoreg=la$autoreg,
                             cost_harvest=la$cost_harvest, cost_to_planting=la$cost_to_planting,
                             scale_trend=la$scale_trend,
                             yield_coeff1=la$yield_coeff[,1],
                             yield_coeff2=la$yield_coeff[,2], yield_coeff3=la$yield_coeff[,3],
                             yield_coeff4=la$yield_coeff[,4], yield_coeff5=la$yield_coeff[,5],
                             death_coeff1=la$death_coeff[,1],
                             yield_uncertainty=la$yield_uncertainty,
                             production_sigma=la$production_sigma, harvest_sigma=la$harvest_sigma)
    }

    if (file.exists(paste0("../data/allfits", output.suffix, ".csv")))
        write.table(df.vmc, paste0("../data/allfits", output.suffix, ".csv"), sep=",", row.names=F, col.names=F, append=T)
    else
        write.table(df.vmc, paste0("../data/allfits", output.suffix, ".csv"), sep=",", row.names=F)

    sumstat <- summary(fit)$summary
    if (!file.exists(paste0("../data/sumstats", output.suffix, ".csv"))) {
        write.table(data.frame(param="lp__", region="dummy", mean=NA, se_mean=NA, sd=NA, `2.5%`=NA, `25%`=NA, `50%`=NA, `75%`=NA, `97.5%`=NA, neff=NA, Rhat=NA), paste0("../data/sumstats", output.suffix, ".csv"), sep=",", row.names=F)
    }
    write.table(cbind(region, sumstat), paste0("../data/sumstats", output.suffix, ".csv"), sep=",", row.names=T, col.names=F, append=T)
}

